package com.six.compress.old;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
import org.apache.orc.storage.ql.exec.vector.ListColumnVector;
import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

/**
 * 测试文件：1068MB
 * <p>
 * 10000条,28001列float
 * none
 * 当前:第=>9999条 cost=>18614  avg=>1ms/条
 * cost=>18653  avg=>1ms/条
 * 文件：1.1GB
 * zlib
 * 当前:第=>9999条 cost=>88321  avg=>8ms/条
 * cost=>87712  avg=>10ms/条
 * 文件：1.0GB
 * snappy
 * 当前:第=>9999条 cost=>19222  avg=>1ms/条
 * cost=>19356  avg=>1ms/条
 * 文件：1.1GB
 * <p>
 */
@SuppressWarnings("all")
public class ORCWriterListTest {

    public static void main(String[] args) throws Exception {
        //定义ORC数据结构，即表结构
        TypeDescription schema = TypeDescription.createStruct();
        schema.addField("time", TypeDescription.createLong());
        schema.addField("params", TypeDescription.createList(TypeDescription.createFloat()));
        Writer writer = null;
        try {
            //输出ORC文件本地绝对路径
            String lxw_orc1_file = "/home/hdfs/data/fly_param_float_arr_none_1.orc";
            Configuration conf = new Configuration();
            FileSystem.getLocal(conf);
            writer = OrcFile.createWriter(
                    new Path(lxw_orc1_file),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema)
                            .compress(CompressionKind.NONE)
                            .version(OrcFile.Version.V_0_12)
            );

            Random random = new Random(1000000000);
            List<Float> list = new ArrayList<>();
            long start = System.currentTimeMillis();

            int size = 28000;
            int max_row = 10000;

            VectorizedRowBatch batch = schema.createRowBatch(max_row);
            LongColumnVector time = (LongColumnVector) batch.cols[0];
            ListColumnVector paramVec = (ListColumnVector) batch.cols[1];

            for (int i = 0; i < max_row; i++) {
                int rowcount = batch.size++;

                time.fill(start + i);

                paramVec.lengths[rowcount] = size;
                paramVec.childCount = size;
                paramVec.setRepeating(false);

                DoubleColumnVector ndsFloat = (DoubleColumnVector) paramVec.child;
                ndsFloat.vector = new double[size];
                ndsFloat.setRepeating(false);

                for (int j = 0; j < size; j++) {
                    ndsFloat.vector[j] = random.nextFloat();
                }
                if ((i + 1) % 100 == 0) {
                    writer.addRowBatch(batch);
                    batch.reset();
                    long end = System.currentTimeMillis();
                    System.out.println("当前:第=>" + i + "条" + " cost=>" + (end - start) + "  avg=>" + (end - start) / i + "ms/条");
                }

            }
            writer.addRowBatch(batch);
            writer.close();
            long e = System.currentTimeMillis();
            System.out.println("cost=>" + (e - start) + "  avg=>" + (e - start) / max_row + "ms/条");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
